library(tidyverse)
library(gt)
library(broom)
library(multiwayvcov)
library(ggpubr)

##### SETUP DATA #####

# `x` is dist
# `z` is expansion
# `Z` is high eligibility (above median)
# `Zx` is high eligibility * dist
# `zx` is expansion * dist
# `Zz` is high eligibility * expansion
# `Zzx` is high eligibility * dist * expansion

with_leip <- read_rds("dataset_leip_dataverse.rds") %>% 
  filter(abs(x) < 100) %>% 
  mutate(`Z` = pcteligible2013,
         `Z` = `Z` >= median(`Z`),
         `Zx` = `Z` * `x`,
         `Zz` = `Z` * `z`,
         `Zzx` = `Z` * `zx`)

##### MODELS #####

# Model for 2014-2010 reg, no covariates

model_1410 <- lm(data = with_leip,
                 formula = dregistration20142010 ~ `z` + `Z` + `Zz` + `x` + `zx` + `Zx` + `Zzx`)

# Simulate two states
# Both HIGH eligibility, median dist from border, differ in expansion

sim_data <- tibble(`z` = c(1, 0),
                   `Z` = TRUE,
                   `Zz` = `Z` * `z`,
                   `x` = c(median(filter(with_leip, x > 0)$x), median(filter(with_leip, x < 0)$x)),
                   `zx` = `z` * `x`,
                   `Zx` = `Z` * `x`,
                   `Zzx` = `Z` * `zx`)

# Simulate coefs, mu, and Y

sim_coeffs <- mvtnorm::rmvnorm(n = 10000, 
                               mean = model_1410$coeff,
                               sigma = cluster.boot(model_1410, cluster = with_leip$fipsstate,
                                                    boot_type = "wild", R = 500))
mu_1410 <- cbind(1, as.matrix(sim_data)) %*% t(sim_coeffs)
reg_1410 <- rnorm(10000, mean = as_vector(mu_1410[1,]), sd = summary(model_1410)$sigma) %>% 
  as_tibble() %>% 
  bind_cols(rnorm(10000, mean = as_vector(mu_1410[2,]), sd = summary(model_1410)$sigma)) %>% 
  rename(yes = 1, no = 2) %>% 
  pivot_longer(cols = everything(), names_to = "expansion", values_to = "estimate")

mean_1410 <- reg_1410 %>% 
  group_by(expansion) %>% 
  summarize(mean = mean(estimate))

p1 <- reg_1410 %>% 
  ggplot() +
  geom_density(aes(estimate, fill = expansion), alpha = 0.5) +
  geom_vline(data = mean_1410,
             aes(xintercept = mean, color = expansion)) +
  scale_fill_discrete(name = "Expansion", labels = c("No", "Yes")) +
  scale_color_discrete(name = "Expansion", labels = c("No", "Yes")) +
  theme_bw() +
  labs(
    title = "High Eligibility",
    x = "Simulated Change",
    y = "Density"
  )

# LOW eligibility

sim_data <- tibble(`z` = c(1, 0),
                   `Z` = FALSE,
                   `Zz` = `Z` * `z`,
                   `x` = c(median(filter(with_leip, x > 0)$x), median(filter(with_leip, x < 0)$x)),
                   `zx` = `z` * `x`,
                   `Zx` = `Z` * `x`,
                   `Zzx` = `Z` * `zx`)

# Simulate coefs, mu, and Y

sim_coeffs <- mvtnorm::rmvnorm(n = 10000, 
                               mean = model_1410$coeff,
                               sigma = cluster.boot(model_1410, cluster = with_leip$fipsstate,
                                                    boot_type = "wild", R = 500))
mu_1410 <- cbind(1, as.matrix(sim_data)) %*% t(sim_coeffs)
reg_1410 <- rnorm(10000, mean = as_vector(mu_1410[1,]), sd = summary(model_1410)$sigma) %>% 
  as_tibble() %>% 
  bind_cols(rnorm(10000, mean = as_vector(mu_1410[2,]), sd = summary(model_1410)$sigma)) %>% 
  rename(yes = 1, no = 2) %>% 
  pivot_longer(cols = everything(), names_to = "expansion", values_to = "estimate")

mean_1410 <- reg_1410 %>% 
  group_by(expansion) %>% 
  summarize(mean = mean(estimate))

p2 <- reg_1410 %>% 
  ggplot() +
  geom_density(aes(estimate, fill = expansion), alpha = 0.5) +
  geom_vline(data = mean_1410,
             aes(xintercept = mean, color = expansion)) +
  scale_fill_discrete(name = "Expansion", labels = c("No", "Yes")) +
  scale_color_discrete(name = "Expansion", labels = c("No", "Yes")) +
  theme_bw() +
  labs(
    title = "Low Eligibility",
    x = "Simulated Change",
    y = "Density"
  )

arr1 <- annotate_figure(ggarrange(p1, p2),
                top = text_grob("Change in Registration 2014-2010", size = 14, face = "bold")
                )

# Model for 2016-2012 reg, no covariates

model_1612 <- lm(data = with_leip,
                 formula = dregistration20162012 ~ `z` + `Z` + `Zz` + `x` + `zx` + `Zx` + `Zzx`)

# HIGH eligibility

sim_data <- tibble(`z` = c(1, 0),
                   `Z` = TRUE,
                   `Zz` = `Z` * `z`,
                   `x` = c(median(filter(with_leip, x > 0)$x), median(filter(with_leip, x < 0)$x)),
                   `zx` = `z` * `x`,
                   `Zx` = `Z` * `x`,
                   `Zzx` = `Z` * `zx`)

sim_coeffs <- mvtnorm::rmvnorm(n = 10000, 
                               mean = model_1612$coeff,
                               sigma = cluster.boot(model_1612, cluster = with_leip$fipsstate,
                                                    boot_type = "wild", R = 500))
mu_1612 <- cbind(1, as.matrix(sim_data)) %*% t(sim_coeffs)
reg_1612 <- rnorm(10000, mean = as_vector(mu_1612[1,]), sd = summary(model_1612)$sigma) %>% 
  as_tibble() %>% 
  bind_cols(rnorm(10000, mean = as_vector(mu_1612[2,]), sd = summary(model_1612)$sigma)) %>% 
  rename(yes = 1, no = 2) %>% 
  pivot_longer(cols = everything(), names_to = "expansion", values_to = "estimate")

mean_1612 <- reg_1612 %>% 
  group_by(expansion) %>% 
  summarize(mean = mean(estimate))

p3 <- reg_1612 %>% 
  ggplot() +
  geom_density(aes(estimate, fill = expansion), alpha = 0.5) +
  geom_vline(data = mean_1612,
             aes(xintercept = mean, color = expansion)) +
  scale_fill_discrete(name = "Expansion", labels = c("No", "Yes")) +
  scale_color_discrete(name = "Expansion", labels = c("No", "Yes")) +
  theme_bw() +
  labs(
    title = "High Eligibility",
    x = "Simulated Change",
    y = "Density"
  )

# LOW eligibility

sim_data <- tibble(`z` = c(1, 0),
                   `Z` = FALSE,
                   `Zz` = `Z` * `z`,
                   `x` = c(median(filter(with_leip, x > 0)$x), median(filter(with_leip, x < 0)$x)),
                   `zx` = `z` * `x`,
                   `Zx` = `Z` * `x`,
                   `Zzx` = `Z` * `zx`)

sim_coeffs <- mvtnorm::rmvnorm(n = 10000, 
                               mean = model_1612$coeff,
                               sigma = cluster.boot(model_1612, cluster = with_leip$fipsstate,
                                                    boot_type = "wild", R = 500))
mu_1612 <- cbind(1, as.matrix(sim_data)) %*% t(sim_coeffs)
reg_1612 <- rnorm(10000, mean = as_vector(mu_1612[1,]), sd = summary(model_1612)$sigma) %>% 
  as_tibble() %>% 
  bind_cols(rnorm(10000, mean = as_vector(mu_1612[2,]), sd = summary(model_1612)$sigma)) %>% 
  rename(yes = 1, no = 2) %>% 
  pivot_longer(cols = everything(), names_to = "expansion", values_to = "estimate")

mean_1612 <- reg_1612 %>% 
  group_by(expansion) %>% 
  summarize(mean = mean(estimate))

p4 <- reg_1612 %>%
  ggplot() +
  geom_density(aes(estimate, fill = expansion), alpha = 0.5) +
  geom_vline(data = mean_1612,
             aes(xintercept = mean, color = expansion)) +
  scale_fill_discrete(name = "Expansion", labels = c("No", "Yes")) +
  scale_color_discrete(name = "Expansion", labels = c("No", "Yes")) +
  theme_bw() +
  labs(
    title = "High Eligibility",
    x = "Simulated Change",
    y = "Density"
  )

arr2 <- annotate_figure(ggarrange(p3, p4),
                top = text_grob("Change in Registration 2016-2012", size = 14, face = "bold")
                )

##### Display figure #####

ggarrange(arr1, arr2, nrow = 2)
